* D:\E\replications\BJPS2020\rawdata\cnef\cnef_1970-2014.do

* cd D:\E\replications\BJPS2020/
clear all
cap log close
log using ./rawdata/cnef/cnef_1970-2014_data.txt, text replace
display "$S_TIME  $S_DATE"
about
ado dir

set more off	
***********
* SHP
***********

clear all
forvalues j=1999/2017 {
	preserve
		* use C:\Dropbox\Rehm\Data\SHP\SHP-17\SHP-Data-CNEF-STATA/shpequiv_`j'.dta, clear
		*use C:\Dropbox\Rehm\Data\SHP\SHP-Data-CNEF-W1-W18-STATA\SHP-Data-CNEF-STATA/shpequiv_`j'.dta, clear
		use ./rawdata/cnef/licensed/SHP-Data-CNEF-STATA/shpequiv_`j'.dta, clear
		gen file="shpequiv_`j'.dta"
		gen year=`j'
		rename *_`j' *
		compress
		tempfile t
		save `t'
	restore
	append using `t'
}
note: ./rawdata/cnef/licensed/SHP-Data-CNEF-STATA/shpequiv_xxxx.dta
gen ccode=225
gen source="SHP"

********
* GSOEP
********
// open the CNEF version of the GSEOP/SOEP
preserve
	*use C:\Dropbox\Rehm\Data\GSOEP\gsoep1984-2015\SOEP-LONG_v32.1i_stata_bilingual/pequiv, clear
	* use D:\E\gsoep\gsoep1984-2017\STATA_DEEN_95_v34\soep.v34i\stata_de+en\pequiv.dta, clear
	use ./rawdata/cnef/licensed/soep.v34i/pequiv.dta, clear
	sum i11101
	gen ccode=255
	gen source="GSEOP"
	rename syear year
	note: ./rawdata/cnef/licensed/soep.v34i/pequiv.dta
	compress
	tempfile t
	save `t'
restore
append using `t'

********
* HILDA 17
********
// HILDA, CNEF variables
preserve
	*cd C:\Dropbox\Rehm\Data\hilda\hilda14
	*use "./zip/STATA 140c/CNEF_Long_n140c.dta", clear
	*use ./HEQUIV_long/hequiv_long.dta, clear
	* use "C:\Dropbox\Rehm\Data\hilda\hilda16\CNEF_Long_p160c.dta", clear 
	use ./rawdata/cnef/licensed/hilda17/CNEF_Long_q170c.dta, clear
	isid year xwaveid
	renpfix zz
	lookfor i11101
	clonevar org_x11102=x11102
	destring x11102, replace
	gen ccode=900
	gen source="HILDA"
	note: ./rawdata/cnef/licensed/hilda17/CNEF_Long_q170c.dta
	note: "2. STATA 170c (Zip File 2 of 2 - Other Data Files).zip"
	compress
	tempfile t
	save `t'
restore
append using `t'


******
* PSID
******
preserve
	*cd C:\Dropbox\Rehm\Data\CNEF\cnef_1970-2013\psid
	*use PEQUIV_long.dta, clear
	*E:\CNEF\PEQUIV_long\pequiv_long.dta
	*use ./PEQUIV_long/pequiv_long.dta, clear
	use ./rawdata/cnef/licensed/cnef_psid/pequiv_long.dta, clear
	keep if w11101>0 & w11101<.
	gen ccode=2
	* note: C:\Dropbox\Rehm\Data\CNEF\cnef_1970-2013\psid\PEQUIV_long.dta
	note: ./rawdata/cnef/licensed/cnef_psid/pequiv_long.dta
	note: was ./Rehm/Data/CNEF/cnef_1970-2014/PEQUIV_long/pequiv_long.dta
	gen source="PSID"
	compress
	tempfile t
	save `t'
restore
append using `t'

******
* BHPS
******
preserve
	*use C:\Dropbox\Rehm\Data\CNEF\cnef_1970-2011/bhps/bequiv_long.dta, clear
	*use ./BEQUIV/bequiv_long.dta, clear
	use ./rawdata/cnef/licensed/cnef_bhps/bequiv_long.dta, clear
	keep if w11101>0 & w11101<.
	renvars *, lower
	cap drop _merge
	gen ccode=200
	note: ./rawdata/cnef/licensed/cnef_bhps/bequiv_long.dta
	note: was ./Rehm/Data/CNEF/cnef_1970-2014/BEQUIV/bequiv_long.dta
	gen source="BHPS"
	compress
	tempfile t
	save `t'
restore
append using `t'

******
* UKLHS / Understanding Society
******
preserve
	*use ./UKEQUIV_long/UKEQUIV.dta, clear
	use ./rawdata/cnef/licensed/cnef_uklhs/UKEQUIV.dta, clear
	renvars *, lower
	keep if w11101>0 & w11101<.
	cap drop _merge
	gen ccode=200
	note: ./rawdata/cnef/licensed/cnef_uklhs/UKEQUIV.dta
	note: was .\Rehm\Data\CNEF\cnef_1970-2014\UKEQUIV_long\UKEQUIV.dta
	gen source="UKLHS"
	compress
	tempfile t
	save `t'
restore
append using `t'


gen mv_weight=(w11101>=.)
tabstat mv_weight, by(ccode) s(mean N)

sum w11101
*keep if w11101>0 & w11101<.

tab ccode, m
tab year, m
tabstat year, by(ccode) s(min max N)

compress
note: ./rawdata/cnef/cnef_1970-2014.do
note: Created on `= c(current_date)'
save ./rawdata/cnef/licensed/cnef_micro.dta, replace

display "$S_TIME  $S_DATE"
cap log close



****************
* Calculate ESIs
****************

cap log close
log using ./rawdata/cnef/cnef_1970-2014_ESIs.txt, text replace
display "$S_TIME  $S_DATE"
about
ado dir

set more off

use if w11101>0 & w11101<. using ./rawdata/cnef/licensed/cnef_micro.dta, clear

set more off
gen dataset="CNEF"

destring xwaveid, gen(pid_aul)
cap drop pid
clonevar pid=x11101ll
replace pid=pid_aul if ccode==900
replace pid=x11101LL if inlist(ccode,2,365)
sum ccode pid year

tabstat pid year, by(ccode) s(N)

drop if pid==.
isid ccode pid year
egen id=group(source ccode pid)
	* This makes sure that BHPS does not get carried forward into UKHLS, even though it could
	* (income variables are different).
isid id year

clonevar age=d11101
clonevar HHsize=d11106
rename w11101 weight

clonevar preY=i11101
clonevar postY=i11102
replace postY=i11113 if ccode==2

// UKLHS has 'income last month' only
replace preY=preY*12 if source=="UKLHS"
replace postY=postY*12 if source=="UKLHS"

cap drop _merge
*merge m:1 ccode year using "C:\Dropbox\Rehm\Data\OECD\cpi\cpi2017.dta"
merge m:1 ccode year using ./rawdata/cnef/OECD_CPI.dta
drop if _merge==2
drop _merge

gen equivalence=sqrt(HHsize)
label var equivalence "Equivalence scale: sqrt(HHsize)"

* Income concepts
gen nY=postY/cpi if postY>0
gen gY=preY/cpi if preY>0
label var nY "HH yearly net income, not equivl."
label var gY "HH yearly market income, not equivl."
gen nYe=nY/equivalence
gen gYe=gY/equivalence
label var nYe "HH yearly net income, equivl."
label var gYe "HH yearly market income, equivl."

** income changes
* top- and bottom-coding at p1 and p99
foreach v of varlist nYe gYe { // nY gY
	bys ccode year: egen PCTTOP_`v'=wpctile(`v'), p(99) weights(weight)
	bys ccode year: egen PCTBOT_`v'=wpctile(`v'), p(1) weights(weight)
	replace `v'=PCTTOP_`v' if `v'>PCTTOP_`v' & `v'<.
	replace `v'=PCTBOT_`v' if `v'<PCTBOT_`v' & `v'<.
}
*drop PCTTOP* PCTBOT*

gen isced3=d11108 if inrange(d11108,1,3)

* income drops
egen group=group(ccode year), label	
foreach v in nYe gYe {
	xtset id year, yearly
	*egen gini0_`v' = inequal(`v') if age>=25 & age<=60, by(group) weight(weight) index(gini)
	*replace `v'=1 if `v'>=0 & `v'<1 // for well-defined ginis. Do we want to do this??
	egen gini_`v'_2560 = inequal(`v') if age>=25 & age<=60, by(group) weight(weight) index(gini)
	egen gini_`v'_all = inequal(`v'), by(group) weight(weight) index(gini)	
	cap drop d1_`v'
	gen d1_`v'=d1.`v'/abs(l1.`v') if `v'<. & l1.`v'<.
	label var  d1_`v' "Growth in `v'"
	cap drop d`v'
	gen d`v'=d1.`v'/(abs(l1.`v'+`v')/2) if `v'<. & l1.`v'<. // arc changes
	label var d`v' "Arc change of `v'"
	gen ESI25_`v'=(d`v'<=-0.25) if d`v'<.
	gen ESI50_`v'=(d`v'<=-0.5) if d`v'<.
	gen ESI10_`v'=(d`v'<=-0.1) if d`v'<.
	gen ESI00_`v'=(d`v'<0) if d`v'<.
	gen upESI25_`v'=(d`v'>=0.25) if d`v'<.
	gen upESI50_`v'=(d`v'>=0.5) if d`v'<.
	gen upESI10_`v'=(d`v'>=0.1) if d`v'<.
	gen upESI00_`v'=(d`v'>0) if d`v'<.

	gen gESI25_`v'  =(d1_`v'<=-0.25) if d1_`v'<.
	gen gESI50_`v'  =(d1_`v'<=-0.5)  if d1_`v'<.
	gen gESI10_`v'  =(d1_`v'<=-0.1)  if d1_`v'<.
	gen gESI00_`v'  =(d1_`v'<0)  if d1_`v'<.
	gen upgESI25_`v'=(d1_`v'>=0.25)  if d1_`v'<.
	gen upgESI50_`v'=(d1_`v'>=0.5)   if d1_`v'<.
	gen upgESI10_`v'=(d1_`v'>=0.1)   if d1_`v'<.
	gen upgESI00_`v'=(d1_`v'>0)   if d1_`v'<.

	if "`v'"=="nYe" local foo "HH disposable income"
	else if "`v'"=="gYe" local foo "HH market income"
	label var gini_`v'_2560 "Gini if `foo', ages 25-60"
	label var gini_`v'_all "Gini if `foo', ages all"
	foreach j in 00 10 25 50 {
		label var ESI`j'_`v' "`j'+% arc drop, `foo'"
		label var upESI`j'_`v' "`j'+% arc gain, `foo'"
		label var gESI`j'_`v' "`j'+% drop, `foo'"
		label var upgESI`j'_`v' "`j'+% gain, `foo'"
	}
}

// Joint incidence of MI + DI drops
foreach j in 00 10 25 50 {
	egen compESI`j'=group(ESI`j'_gYe ESI`j'_nYe), label
	egen compgESI`j'=group(gESI`j'_gYe gESI`j'_nYe), label	
	label var compESI`j' "group(ESI`j'_gYe ESI`j'_nYe)"
	label var compgESI`j' "group(gESI`j'_gYe gESI`j'_nYe)"
}
label list compESI25

foreach j in 00 10 25 50 {
	foreach m in 1 2 3 4 {
		gen RR`j'_`m'= (compESI`j'==`m') if compESI`j'<.
		gen RR`j'_g`m'=(compgESI`j'==`m') if compgESI`j'<.
	}
	label var RR`j'_1   "MI+DI drops (ESI`j'_gYe=0 + ESI`j'_nYe==0)"
	label var RR`j'_2   "MI+DI drops (ESI`j'_gYe=0 + ESI`j'_nYe==1)"
	label var RR`j'_3   "MI+DI drops (ESI`j'_gYe=1 + ESI`j'_nYe==0)"
	label var RR`j'_4   "MI+DI drops (ESI`j'_gYe=1 + ESI`j'_nYe==1)"
	label var RR`j'_g1 "MI+DI drops (gESI`j'_gYe=0 + gESI`j'_nYe==0)"
	label var RR`j'_g2 "MI+DI drops (gESI`j'_gYe=0 + gESI`j'_nYe==1)"
	label var RR`j'_g3 "MI+DI drops (gESI`j'_gYe=1 + gESI`j'_nYe==0)"
	label var RR`j'_g4 "MI+DI drops (gESI`j'_gYe=1 + gESI`j'_nYe==1)"
}

compress
save C:\Users\rehm.16\Desktop\tmp\cnef\cnef_tmp5.dta, replace

foreach v of varlist nYe gYe {
	clonevar my_`v'=`v'
	clonevar sd_`v'=`v'
	label var my_`v' "`: var label `v'' (mean)"
	label var sd_`v' "`: var label `v'' (SD)"
}

preserve
	* ages 25-60
	keep if age>=25 & age<=60
	foreach j in 00 10 25 50 {
		foreach v in nYe gYe {
			gen d`j'_`v'=d`v' if ESI`j'_`v'==1
			label var d`j'_`v' "Median drop size if ESI`j'_`v'==1"
			gen d1_`j'_`v'=d`v' if gESI`j'_`v'==1
			label var d1_`j'_`v' "Median drop size if gESI`j'_`v'==1"
		}
	}

	foreach v of varlist _all {
		local l`v': var label `v'
	}
	collapse (sd) sd_* (mean) my_* gin*_* *ESI??_* RR??_* (median) d??_?Ye  d1_??_?Ye [aw=weight], by(ccode year source)
	foreach v of varlist _all {
		label var `v' "`l`v''"
	}
	gen ages="25-60"
	gen dataset="CNEF"
	compress
	note: ./rawdata/cnef/cnef_1970-2014.do
	note: ./rawdata/cnef/ESIs_ages25-60_cnef.dta
	note: Created on `= c(current_date)'
	saveold ./rawdata/cnef/ESIs_ages25-60_cnef.dta, replace
restore

* collapse to get drops
preserve
	* all ages
	foreach j in 00 10 25 50 {
		foreach v in nYe gYe {
			gen d`j'_`v'=d`v' if ESI`j'_`v'==1
			label var d`j'_`v' "Median drop size if ESI`j'_`v'==1"
			gen d1_`j'_`v'=d`v' if gESI`j'_`v'==1
			label var d1_`j'_`v' "Median drop size if gESI`j'_`v'==1"
		}
	}

	foreach v of varlist _all {
		local l`v': var label `v'
	}
	collapse (sd) sd_* (mean) my_* gin*_* *ESI??_* RR??_* (median) d??_?Ye  d1_??_?Ye [aw=weight], by(ccode year source)
	foreach v of varlist _all {
		label var `v' "`l`v''"
	}
	gen ages="all"
	gen dataset="CNEF"
	compress
	note: ./rawdata/cnef/cnef_1970-2014.do
	note: ./rawdata/cnef/ESIs_cnef.dta
	note: Created on `= c(current_date)'
	saveold ./rawdata/cnef/ESIs_cnef.dta, replace
restore


display "$S_TIME  $S_DATE"
cap log close

